import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.arima_model import ARIMA
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score
import seaborn as sns
######################################## Import Data From Quandl API ##########################################################
#import quandl
#import pandas as pd
#import xlsxwriter
##mydata = quandl.get_table('ZACKS/FC', ticker='AAPL')
#quandl.ApiConfig.api_key = "XXXXXXXXXXXXXX" ########## Register and generate a key ##############
#mydata = quandl.get("FRED/GDP")
#excelfile= r'dailyd2.xlsx'
#workbook = xlsxwriter.Workbook(excelfile)
#worksheet1 = workbook.add_worksheet('daily')
#bold = workbook.add_format({'bold': True})
#row = 0
#worksheet1.write(row,0,'Date')
#mydata = quandl.get('WIKI/AAPL', start_date="2017-01-01", end_date="2017-08-25")
#daterange = mydata.index ############# Save the date range ###############
#for i in daterange:
# row = row+1
# worksheet1.write(row,0,i)
#data = pd.read_csv(r'0825_quandl_ticks2.csv') ########### Load the list of tickers #############
#tlist = data['Ticker']
#col = 0
#for tl in tlist:
# try:
# mydata = quandl.get('WIKI/'+tl, start_date="2017-01-01", end_date="2017-08-25")
# if len(mydata)-1 == len(daterange):
# row = 0
# col = col+1
# worksheet1.write(row,col,tl,bold)
# for dr in daterange:
# row = row+1
# for i,j in mydata['Adj. Close'].iteritems():
# if dr==i:
# worksheet1.write(row,col,j)
# print(tl,j)
# except Exception as e:
# print(e)
#workbook.close()
######## Load Data #########
data = pd.read_excel(r'C:\ModelData\dailyd2.xlsx', index_col='Date', parse_dates=True)
################ Calculate and plot volatility #####################
df_stat = pd.DataFrame(columns = ['std','mean','normalized_std'])
df_stat[['std','mean','normalized_std']] = pd.DataFrame([data.std(),data.mean(),data.std()/data.mean()]).T
df_stat.sort_values('normalized_std')
plt.title('Normalized Standard Deviation')
plt.hist(df_stat['normalized_std'])
fig = plt.gcf()
fig.set_size_inches(14.5, 5.5)
plt.show()
######### Based on the graph above filter out stocks with high variance and low returns#############
df_fltr = df_stat[df_stat['normalized_std']<.15]
df = data[df_fltr.index]
tkl = [x for x in df if (df[x][-1]-df[x][0])/df[x][0]>=.25 ]
df = df[tkl]
fltr_tk = []
for tk in df:
Y = df[tk].as_matrix()
X = range(len(df.index))
X = sm.add_constant(X)
model = sm.OLS(Y,X)
results = model.fit()
if results.params[1] > .01: ########## Filter on slope coefficient from Regression Model #########
fltr_tk.append(tk)
df.plot(y=[tk])
plt.title('Daily '+tk+' Stock Prices 2017-01-01 to 2017-07-31')
plt.legend(loc='upper left')
plt.axvspan('2017-08-01','2017-08-25', color='green', alpha=0.25)
fig = plt.gcf()
fig.set_size_inches(16.5, 4.5)
plt.show()
################ PLot Trend and Seasonality #########################
for tk in fltr_tk:
df_d = df[tk]
adj_index = pd.date_range(df_d.index[0], periods=len(df), freq='D')
df_d.index = adj_index
decomposition = seasonal_decompose(df_d)
trend = decomposition.trend
seasonal = decomposition.seasonal
seasonal.index = df.index
trend.index = df.index
df_d.index = df.index
plt.subplot(1,2,1)
plt.plot(trend,label='Trend')
plt.title(tk)
plt.xticks(rotation=90)
plt.legend(loc='best')
plt.subplot(1,2,2)
plt.plot(seasonal,label='Seasonality')
plt.title(tk)
plt.xticks(rotation=90)
plt.legend(loc='best')
fig = plt.gcf()
fig.set_size_inches(16.5, 4.5)
plt.show()
############### Run ARIMA Model ###################
store = {}
for tk in fltr_tk:
train = df[tk][0:-20]
test = df[tk][len(train):]
ap = 99
ad = 99
aq = 99
amape = 99
af = []
for p in range(10):
for q in range(10):
for d in range(2):
try:
model = ARIMA(train, order=(p, d, q)).fit()
predict = model.forecast(len(test))
fcst=predict[0]
mapelist = []
for i in range(len(fcst)):
mapelist.insert(i, (np.absolute(test[i] - fcst[i])) / test[i])
mape = np.mean(mapelist) * 100
mape = round(mape,2)
except:
mape = 9999
pass
if amape > mape:
amape = mape
ap = p
ad = d
aq = q
af= fcst
store[tk] = af
plt.plot(train)
plt.plot(test,label='Actual')
plt.plot(test.index,af,label='Predicted')
fig = plt.gcf()
fig.set_size_inches(16.5, 4.5)
plt.title(str(tk)+"_"+"MAPE"+"_"+str(amape)+"_"+"Order"+"_"+"("+str(ap)+str(ad)+str(aq)+")")
plt.legend(loc='best')
plt.show()
###### Correlation HeatMap #######
corr = df[fltr_tk].corr(method='pearson', min_periods=1).abs()
ax = sns.heatmap(corr);
fig = plt.gcf()
fig.set_size_inches(18.5, 10.5)
ax.set_title('PortFolio Diversity HeatMap');
plt.show()
##### Test model accuracy #####
eval_metrcs = pd.DataFrame(columns = ['Actual','Predicted','Actual_Growth','Predicted_Growth'],index = fltr_tk)
for tk in fltr_tk:
train = df[tk][0:-20]
test = df[tk][len(train):]
if (train[-1] >= test[-1] and train[-1] >= store[tk][-1]):
eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 0
eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 0
eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1] - train[-1])/train[-1]
eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (store[tk][-1] - train[-1])/train[-1]
elif (train[-1] < test[-1] and train[-1] < store[tk][-1]):
eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 1
eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 1
eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1] - train[-1])/train[-1]
eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (store[tk][-1] - train[-1])/train[-1]
elif (train[-1] >= test[-1] and train[-1] < store[tk][-1]):
eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 0
eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 1
eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1] - train[-1])/train[-1]
eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (store[tk][-1] - train[-1])/train[-1]
elif (train[-1] < test[-1] and train[-1] >= store[tk][-1]):
eval_metrcs['Actual'].loc[eval_metrcs.index == tk] = 1
eval_metrcs['Predicted'].loc[eval_metrcs.index == tk] = 0
eval_metrcs['Actual_Growth'].loc[eval_metrcs.index == tk] = (test[-1] - train[-1])/train[-1]
eval_metrcs['Predicted_Growth'].loc[eval_metrcs.index == tk] = (store[tk][-1] - train[-1])/train[-1]
eval_metrcs
model_accuracy = accuracy_score(eval_metrcs['Actual'].astype(int),eval_metrcs['Predicted'].astype(int))
model_accuracy